In [1]:
import pandas as pd
import altair as alt
In [2]:
alt.data_transformers.disable_max_rows() # Disable 5_000 rows limit
data = pd.read_csv("https://github.com/Krow10/code4rena-scraper/raw/master/leaderboard_code4rena.csv") # Set path accordingly
In [3]:
data
Out[3]:
period handle is_team prize_money total_reports high_all high_solo med_all med_solo gas_all
0 LAST 60 DAYS Soosh False 70985.01 1 0 0 1 1 0
1 LAST 60 DAYS HE1M False 40620.85 20 6 0 11 1 0
2 LAST 60 DAYS codehacker False 24571.73 1 0 0 1 0 0
3 LAST 60 DAYS Trust False 22909.46 31 9 0 19 3 0
4 LAST 60 DAYS cccz False 20523.95 17 5 1 10 2 0
... ... ... ... ... ... ... ... ... ... ...
2286 ALL TIME alexxander False 0.00 1 0 0 1 0 0
2287 ALL TIME navinavu False 0.00 1 0 0 1 0 0
2288 ALL TIME Kumpirmafyas False 0.00 2 0 0 0 0 0
2289 ALL TIME 0xMesaj False 0.00 1 0 0 0 0 0
2290 ALL TIME ad3sh_ False 0.00 2 0 0 0 0 0

2291 rows × 10 columns

In [4]:
plt_data = data.copy()

qa_all¶

Assumes missing reports from total_reports are QA reports

In [5]:
plt_data["qa_all"] = plt_data["total_reports"] - (plt_data["high_all"] + plt_data["med_all"] + plt_data["gas_all"])
In [6]:
plt_data = plt_data.melt(id_vars=["period", "prize_money"], value_vars=list(plt_data.columns[3:]), var_name="type")
In [7]:
plt_data["type"] = plt_data["type"].map(
    {
        'gas_all': 'Gas optimization', 
        'high_all': 'Common high',
        'high_solo': 'Unique high',
        'med_all': 'Common medium',
        'med_solo': 'Unique medium',
        'qa_all': 'QA',
        'total_reports': 'Total reports'
    }
)

Period dropdown¶

Filter from which period the data should be taken. These are the same options as the leaderboard entries from the Code4rena website.

In [8]:
period_dropdown = alt.binding_select(options=list(plt_data["period"].unique()), name='Select a period:')
period_selector = alt.selection_single(
    fields=['period'], 
    bind=period_dropdown, 
    name="period_selector", 
    init={'period':'ALL TIME'}
)
In [9]:
ordered_legend_reports_labels = [
    'Unique high',
    'Common high',
    'Unique medium',
    'Common medium',
    'Gas optimization', 
    'QA',
    'Total reports'
]
label_colors = ["#FE266D","#FA6C44","#F2E713","#D1D811","#0AB6F8","#5688C1","#AA3678"]
In [10]:
base = alt.Chart(plt_data).transform_calculate(
    order="{ \
    'Total reports': 0, \
    'QA': 1, \
    'Gas optimization': 2, \
    'Common medium': 3, \
    'Unique medium': 4, \
    'Common high': 5, \
    'Unique high': 6}[datum.risk]"
).mark_circle().encode(
    x=alt.X('value:Q', scale=alt.Scale(type='symlog'), title='Number of reports'),
    y=alt.Y('prize_money:Q', title='Prize money ($USD)', scale=alt.Scale(domainMin=0)),
    color=alt.Color(
        'type:N', 
        title='Report type',
        scale=alt.Scale(domain=ordered_legend_reports_labels, range=label_colors),
        legend=alt.Legend(
            title='Reports type', 
            values=ordered_legend_reports_labels, 
            orient="top",
            labelFontSize=14,
            symbolStrokeWidth=10
        )
    ),
    order='order:O'
).properties(
    width=850,
    height=400
).add_selection(
    period_selector
).transform_filter(
    period_selector
)

Correlation between the prize money and number of reports by severity¶

For a given period, shows the correlation (via polynomial regression) between the number of reports and the prize money of a warden for each report category.

Note that the x-axis is a logarithmic scale.

In [11]:
base.transform_regression(
    'value', 
    'prize_money', 
    method='poly', 
    groupby=['type']
).mark_line().configure_axis(titleFontSize=14, labelFontSize=12)
/opt/hostedtoolcache/Python/3.11.1/x64/lib/python3.11/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
Out[11]:
In [12]:
df = pd.read_csv('https://raw.githubusercontent.com/code-423n4/code423n4.com/main/_data/findings/findings.csv')
In [13]:
ordered_risk_labels = ['0', '5_QA', '4_Gas optimization', '3_Low', '2_Med', '1_High']
original = ['0', 'q', 'g', '1', '2', '3']
risk_labels = dict(zip(original, ordered_risk_labels))
df['risk'] = df['risk'].map(risk_labels)
In [14]:
chart = alt.Chart(width=800, height=400).transform_filter(
    (alt.datum.split == 1) & (alt.datum.risk != '0')
).mark_bar(
).encode(
    y=alt.Y("count()", title=''),
    color=alt.Color(
        'risk:N',
        scale=alt.Scale(scheme='yelloworangered', 
        domain=ordered_risk_labels[1:])
    )
).resolve_scale(y='independent')

Round base for award money ranges¶

Set to round the award money ranges to the nearest multiple of this variable

In [15]:
round_base = 10
In [16]:
steps = []
for _risk in ordered_risk_labels[::-1]:
    awards = df[(df.risk == _risk) & (df.split == 1) & (df.awardUSD != 0)].awardUSD
    steps.append(round_base * round((awards.min() + awards.max() / awards.count()) / round_base))

Distribution of solo finding reward by severity¶

The bar graphs represents the number of award money for unique findings falling into the range indicated on the x-axis, for each report severity category.

Note that the x-axis scales are different for each graph, watch carefully.

PS: the labels are prefixed with a number only for the purpose of ordering the charts and doesn't represent the 'risk' present in the original data (i.e. high=3, med=2, low=1, etc.).

In [17]:
alt.vconcat(
  *(chart.transform_filter(
      alt.datum.risk == _risk
  ).encode(
        x=alt.X(
            "awardUSD:Q", 
            bin=alt.Bin(step=_step), 
            title='Award money ($USD)',
            axis=alt.Axis(format='$,.0f')
        ),
        tooltip=['risk:N', 'count():Q']
  ) for (_risk, _step) in list(zip(ordered_risk_labels[1:][::-1], steps))),
    data=df
)
/opt/hostedtoolcache/Python/3.11.1/x64/lib/python3.11/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
Out[17]:
In [18]:
df_contests = pd.read_csv('../contests_code4rena.csv')
df_contests.head(1)
Out[18]:
contest_report_repo contest_sponsor contest_desc start end prize_pool handle prize_money total_reports high_all high_solo med_all med_solo gas_all
0 NaN lsdnetwork-stakehouse A permissionless 3 pool liquid staking solutio... 11 November 2022 18 November 2022 75000 Uunforgiven 10860.28 10 8 3 2 0 0
In [19]:
df_github = pd.read_csv('../github_code4rena.csv')
df_github.head(1)
Out[19]:
contest contest_sponsor date handle address risk title issueId issueUrl tags issueCreation
0 94 foundation 2022/02 bobi 0x8F24B7bEe60c5421101e2e596b1008522368920F Q QA Report 70 https://github.com/code-423n4/2022-02-foundati... bug;invalid;QA (Quality Assurance);sponsor dis... 2022-03-02T21:26:45Z
In [20]:
df_merge = pd.merge(df_contests[['contest_sponsor', 'prize_pool']], df_github[['contest_sponsor', 'contest']], how='outer', on='contest_sponsor')
df_merge = df_merge.dropna().reset_index(drop=True)
df_merge['contest'] = df_merge['contest'].astype(int)
df_merge.head(1)
Out[20]:
contest_sponsor prize_pool contest
0 holograph 75000.0 170
In [21]:
df_merge = df_merge[['contest', 'prize_pool']].drop_duplicates().reset_index(drop=True)
df = pd.merge(df, df_merge[['contest', 'prize_pool']], how='outer', on='contest')
df = df[df['prize_pool'].notna()].reset_index(drop=True)
In [22]:
box_size = 25

boxplot_chart = alt.Chart(width=800, height=400).transform_filter(
    (alt.datum.split == 1) & (alt.datum.risk != '0') & (alt.datum.awardUSD != 0) # Remove '0' labels and 0 award values
).mark_boxplot(
    extent='min-max',
    size=box_size,
).encode(
    x=alt.X(
        'prize_pool:O',
        axis=alt.Axis(title='Prize pool ($USD)', titleAngle=0, labelAngle=-45)
    ),
    y=alt.Y(
        'awardUSD:Q',
        axis=alt.Axis(format='$,.0f', title='Award money ($USD)')
    ),
    color=alt.Color(
        'risk:N',
        scale=alt.Scale(scheme='yelloworangered', domain=ordered_risk_labels[1:]),
        legend=alt.Legend(title='Risk level')
    ),
)
In [23]:
count_prize_pool = alt.Chart(width=800, height=400).transform_filter(
    (alt.datum.split == 1) & (alt.datum.risk != '0') & (alt.datum.awardUSD != 0) # Remove '0' labels and 0 award values
).mark_bar(
    opacity=0.1
).encode(
    x=alt.X(
        'prize_pool:O',
        axis=alt.Axis(title='Prize pool ($USD)', titleAngle=0, labelAngle=-45)
    ),
    y=alt.Y(
        'distinct(contest):Q',
        title='Number of contests',
        axis=alt.Axis(tickMinStep=1)
    ),
    color=alt.Color(
        'risk:N',
        scale=alt.Scale(scheme='yelloworangered', domain=ordered_risk_labels[1:]),
        legend=None
    ),
    tooltip=['distinct(contest):Q']
)
#df[(df.split == 1) & (df.risk != '0') & (df.awardUSD != 0)].groupby('prize_pool')['contest'].nunique()
#count_prize_pool

Distribution of award money compared by prize pool, for each severity level¶

The boxplots (right y-axis) indicates the distribution of award money for unique findings by prize pool, for each severity report level.

The bar charts in the background (left y-axis) indicates the number of contests grouped by prize pool, meaning the size of the data from which the distributions are calculated.

Note that the y-axis scales are different for each graph.

In [24]:
alt.vconcat(
  *(alt.layer(count_prize_pool, boxplot_chart).resolve_scale(
        y='independent',
        color='independent'
    ).transform_filter(alt.datum.risk == _risk) for _risk in ordered_risk_labels[1:][::-1]),
    data=df
)
/opt/hostedtoolcache/Python/3.11.1/x64/lib/python3.11/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
Out[24]:
In [ ]: